library(readxl)
library(dplyr)
library(stringr)
library(purrr)
library(lubridate)
library(data.table)
library(plm)
library(lmtest)
library(sandwich)
library(ggplot2)

##############
## Analysis ##
##############
pdata <- read.csv("navco_country_final.csv")

library(slider)
library(lubridate)

pdata <- pdata %>%
  mutate(event_date = as.Date(event_date)) %>%
  arrange(COWcode, event_date) %>%
  group_by(COWcode) %>%
  mutate(
    n_events_recent365 = slide_index_int(
      .x = event_date,    
      .i = event_date,    
      .f = ~length(.x),   
      .before = days(365),  
      .complete = FALSE
    )
  ) %>%
  ungroup()

pdata <- pdata.frame(pdata, index = c("COWcode", "event_date"))

pdata_clmm <- pdata %>%
  filter(!is.na(participants_lead),
         !is.na(fatality_diff_signed_weighted),
         !is.na(v2x_polyarchy),
         !is.na(v2xel_locelec),
         !is.na(gdppc),
         !is.na(pop))

poly_terms <- poly(pdata_clmm$fatality_diff_signed_weighted, 2)
pdata_clmm$poly1 <- poly_terms[, 1]
pdata_clmm$poly2 <- poly_terms[, 2]

m_clmm_quad <- plm(participants_lead ~ poly1 + poly2 + n_events_recent365 +
                      v2x_polyarchy + v2xel_locelec + gdppc + pop,
                   data = pdata_clmm,
                   model = "within",
                   index = c("COWcode", "event_date"),
                   effect = "twoways")

coefs <- coef(m_clmm_quad)
vcov_mat <- vcov(m_clmm_quad)

navco_result1_country <- m_clmm_quad

x_seq <- seq(min(pdata_clmm$fatality_diff_signed_weighted, na.rm = TRUE),
             max(pdata_clmm$fatality_diff_signed_weighted, na.rm = TRUE),
             length.out = 100)

poly_deriv <- predict(poly_terms, newdata = x_seq, deriv = 1)

grad_matrix <- cbind(poly_deriv[, 1], poly_deriv[, 2])
colnames(grad_matrix) <- c("poly1", "poly2")

marginal_effect <- grad_matrix[, 1] * coefs["poly1"] +
  grad_matrix[, 2] * coefs["poly2"]

marginal_se <- apply(grad_matrix, 1, function(g) {
  sqrt(t(g) %*% vcov_mat[c("poly1", "poly2"), c("poly1", "poly2")] %*% g)
})

prediction_data <- data.frame(
  x = x_seq,
  marginal_effect = marginal_effect,
  ci_low = marginal_effect - 1.96 * marginal_se,
  ci_high = marginal_effect + 1.96 * marginal_se
)

navco_marginal1_country <- ggplot(prediction_data, aes(x = x, y = marginal_effect)) +
  geom_line(color = "black", size = 0.5) +
  geom_ribbon(aes(ymin = ci_low, ymax = ci_high), fill = "black", alpha = 0.3) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Marginal Effect of Repression Deviations\n(NAVCO: Country)",
    x = "Repression Deviations",
    y = "Marginal Effect on Protest Participation (log)"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    axis.title.x = element_text(size = 15, margin = margin(t = 15)),
    axis.title.y = element_text(size = 15, margin = margin(r = 15)),
    axis.text = element_text(size = 15)
  )

print(navco_marginal1_country)

# Step 1: Prepare data
pdata_clmm <- pdata %>%
  filter(!is.na(participants_lead),
         !is.na(fatalities_log),
         !is.na(v2x_polyarchy),
         !is.na(v2xel_locelec),
         !is.na(gdppc),
         !is.na(pop))

poly_terms <- poly(pdata_clmm$fatalities_log, 2)
pdata_clmm$poly1 <- poly_terms[, 1]
pdata_clmm$poly2 <- poly_terms[, 2]

m_clmm_quad <- plm(participants_lead ~ poly1 + poly2 + n_events_recent365 +
                      v2x_polyarchy + v2xel_locelec + gdppc + pop,
                   data = pdata_clmm,
                   model = "within",
                   index = c("COWcode", "event_date"),
                   effect = "twoways")

navco_country <- m_clmm_quad

coefs <- coef(m_clmm_quad)
vcov_mat <- vcov(m_clmm_quad)

x_seq <- seq(min(pdata_clmm$fatalities_log, na.rm = TRUE),
             max(pdata_clmm$fatalities_log, na.rm = TRUE),
             length.out = 100)

poly_deriv <- predict(poly_terms, newdata = x_seq, deriv = 1)

grad_matrix <- cbind(poly_deriv[, 1], poly_deriv[, 2])
colnames(grad_matrix) <- c("poly1", "poly2")

marginal_effect <- grad_matrix[, 1] * coefs["poly1"] +
  grad_matrix[, 2] * coefs["poly2"]

marginal_se <- apply(grad_matrix, 1, function(g) {
  sqrt(t(g) %*% vcov_mat[c("poly1", "poly2"), c("poly1", "poly2")] %*% g)
})

prediction_data <- data.frame(
  ndeath_log = x_seq,
  marginal_effect = marginal_effect,
  ci_low = marginal_effect - 1.96 * marginal_se,
  ci_high = marginal_effect + 1.96 * marginal_se
)

navco_marginal2_country <- ggplot(prediction_data, aes(x = ndeath_log, y = marginal_effect)) +
  geom_line(color = "black", size = 0.5) +
  geom_ribbon(aes(ymin = ci_low, ymax = ci_high), fill = "black", alpha = 0.3) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Marginal Effect of Simple Repression\n(NAVCO: Country)",
    x = "Fatalities",
    y = "Marginal Effect on Protest Participation (log)"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    axis.title.x = element_text(size = 15, margin = margin(t = 15)),
    axis.title.y = element_text(size = 15, margin = margin(r = 15)),
    axis.text = element_text(size = 15)
  )

print(navco_marginal2_country)

# Step 1: Prepare data
pdata_clmm_exp <- pdata %>%
  filter(!is.na(participants_lead),
         !is.na(fatality_diff_signed_weighted_exp),
         !is.na(v2x_polyarchy),
         !is.na(v2xel_locelec),
         !is.na(gdppc),
         !is.na(pop))

# Step 2: Fit quadratic plm model with exponential variable
poly_terms_exp <- poly(pdata_clmm_exp$fatality_diff_signed_weighted_exp, 2)
pdata_clmm_exp$poly1 <- poly_terms_exp[, 1]
pdata_clmm_exp$poly2 <- poly_terms_exp[, 2]

m_clmm_quad_exp <- plm(participants_lead ~ poly1 + poly2 + n_events_recent365 +
                         v2x_polyarchy + v2xel_locelec + gdppc + pop,
                       data = pdata_clmm_exp,
                       model = "within",
                       index = c("COWcode", "event_date"),
                       effect = "twoways")

# Step 3: Prepare prediction data
coefs_exp <- coef(m_clmm_quad_exp)
vcov_mat_exp <- vcov(m_clmm_quad_exp)

navco_result1_country_exp <- m_clmm_quad_exp

x_seq_exp <- seq(min(pdata_clmm_exp$fatality_diff_signed_weighted_exp, na.rm = TRUE),
                 max(pdata_clmm_exp$fatality_diff_signed_weighted_exp, na.rm = TRUE),
                 length.out = 100)

poly_deriv_exp <- predict(poly_terms_exp, newdata = x_seq_exp, deriv = 1)

grad_matrix_exp <- cbind(poly_deriv_exp[, 1], poly_deriv_exp[, 2])
colnames(grad_matrix_exp) <- c("poly1", "poly2")

marginal_effect_exp <- grad_matrix_exp[, 1] * coefs_exp["poly1"] +
  grad_matrix_exp[, 2] * coefs_exp["poly2"]

marginal_se_exp <- apply(grad_matrix_exp, 1, function(g) {
  sqrt(t(g) %*% vcov_mat_exp[c("poly1", "poly2"), c("poly1", "poly2")] %*% g)
})

prediction_data_exp <- data.frame(
  x = x_seq_exp,
  marginal_effect = marginal_effect_exp,
  ci_low = marginal_effect_exp - 1.96 * marginal_se_exp,
  ci_high = marginal_effect_exp + 1.96 * marginal_se_exp
)

# Step 4: Plot
navco_marginal1_country_exp <- ggplot(prediction_data_exp, aes(x = x, y = marginal_effect)) +
  geom_line(color = "black", size = 0.5) +
  geom_ribbon(aes(ymin = ci_low, ymax = ci_high), fill = "black", alpha = 0.3) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Marginal Effect of Repression Deviations\n(NAVCO: Country, Exponential Decay)",
    x = "Repression Deviations (Exponential Decay)",
    y = "Marginal Effect on Protest Participation (log)"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    axis.title.x = element_text(size = 15, margin = margin(t = 15)),
    axis.title.y = element_text(size = 15, margin = margin(r = 15)),
    axis.text = element_text(size = 15)
  )

# Step 5: Print plot
print(navco_marginal1_country_exp)

